{
 "metadata": {
  "description": "Extracting features and visualizing trained filters with an example image, viewed layer-by-layer.",
  "example_name": "Filter visualization",
  "include_in_docs": true,
  "priority": 2,
  "signature": "sha256:297f9ccd64cb6c8acfe0e34cd4b4ccd05e172d55704063125b585445ca8302d1"
 },
 "nbformat": 3,
 "nbformat_minor": 0,
 "worksheets": [
  {
   "cells": [
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Here we visualize filters and outputs using the network architecture proposed by Krizhevsky et al. for ImageNet and implemented in `caffe`.\n",
      "\n",
      "(This page follows DeCAF visualizations originally by Yangqing Jia.)"
     ]
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "First, import required modules and set plotting parameters"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "import numpy as np\n",
      "import scipy\n",
      "import matplotlib.pyplot as plt\n",
      "import cv2\n",
      "%matplotlib inline\n",
      "\n",
      "# Make sure that caffe is on the python path:\n",
      "caffe_root = '../'  # this file is expected to be in {caffe_root}/examples\n",
      "import sys\n",
      "sys.path.insert(0, caffe_root + 'python')\n",
      "\n",
      "import caffe\n",
      "\n",
      "plt.rcParams['figure.figsize'] = (10, 10)\n",
      "plt.rcParams['image.interpolation'] = 'nearest'\n",
      "plt.rcParams['image.cmap'] = 'gray'"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Run `./scripts/download_model_binary.py models/bvlc_reference_caffenet` to get the pretrained CaffeNet model, load the net, specify test phase and CPU mode, and configure input preprocessing."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "net = caffe.Classifier('/deep/u/willsong/caffe/models/brody/deploy.prototxt',\n",
      "                       '/deep/u/willsong/caffe/models/brody/caffe_driving_iter_10000.caffemodel')\n",
      "#                       '/deep/u/willsong/caffe/models/brody/caffe_brody_train_gc_iter_300000.caffemodel')\n",
      "                       #'/deep/u/willsong/caffe/models/brody/caffe_brody_train_iter_530000.caffemodel')\n",
      "net.set_phase_test()\n",
      "net.set_mode_gpu()\n",
      "\n",
      "# input preprocessing: 'data' is the name of the input blob == net.inputs[0]\n",
      "net.set_mean('data', np.load('/deep/u/willsong/caffe/python/driving_mean.npy'))  # ImageNet mean\n",
      "net.set_raw_scale('data', 255)  # the reference model operates on images in [0,255] range instead of [0,1]\n",
      "net.set_channel_swap('data', (2,1,0))  # the reference model has channels in BGR order instead of RGB"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Run a classification pass"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "net.ff([caffe.io.load_image('/deep/group/driving_data/andriluka/IMAGES/driving_data_q50_data/all_extracted/4-3-14-gilroy-split_0_from_gilroy_c2/4-3-14-gilroy-split_0_from_gilroy_c2_001541.jpeg')])"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The layer features and their shapes (10 is the batch size, corresponding to the the ten subcrops used by Krizhevsky et al.)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "[(k, v.data.shape) for k, v in net.blobs.items()]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The parameters and their shapes (each of these layers also has biases which are omitted here)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "[(k, v[0].data.shape) for k, v in net.params.items()]"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Helper functions for visualization"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# take an array of shape (n, height, width) or (n, height, width, channels)\n",
      "#  and visualize each (height, width) thing in a grid of size approx. sqrt(n) by sqrt(n)\n",
      "def vis_square(data, padsize=1, padval=0):\n",
      "    data -= data.min()\n",
      "    data /= data.max()\n",
      "    \n",
      "    # force the number of filters to be square\n",
      "    n = int(np.ceil(np.sqrt(data.shape[0])))\n",
      "    padding = ((0, n ** 2 - data.shape[0]), (0, padsize), (0, padsize)) + ((0, 0),) * (data.ndim - 3)\n",
      "    data = np.pad(data, padding, mode='constant', constant_values=(padval, padval))\n",
      "    \n",
      "    # tile the filters into an image\n",
      "    data = data.reshape((n, n) + data.shape[1:]).transpose((0, 2, 1, 3) + tuple(range(4, data.ndim + 1)))\n",
      "    data = data.reshape((n * data.shape[1], n * data.shape[3]) + data.shape[4:])\n",
      "    \n",
      "    plt.imshow(data)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The input image"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# index four is the center crop\n",
      "plt.imshow(net.deprocess('data', net.blobs['data'].data[4]))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The first layer filters, `conv1`"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "# the parameters are a list of [weights, biases]\n",
      "filters = net.params['conv1'][0].data\n",
      "vis_square(filters.transpose(0, 2, 3, 1))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The first layer output, `conv1` (rectified responses of the filters above, first 36 only)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['conv1'].data[4, 36:82]\n",
      "vis_square(feat, padval=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The second layer filters, `conv2`\n",
      "\n",
      "There are 256 filters, each of which has dimension 5 x 5 x 48. We show only the first 48 filters, with each channel shown separately, so that each filter is a row."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "filters = net.params['conv2'][0].data\n",
      "vis_square(filters[:48].reshape(48**2, 5, 5))"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The second layer output, `conv2` (rectified, only the first 36 of 256 channels)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['conv2'].data[4, :36]\n",
      "vis_square(feat, padval=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The third layer output, `conv3` (rectified, all 384 channels)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['conv3'].data[4]\n",
      "vis_square(feat, padval=0.5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The fourth layer output, `conv4` (rectified, all 384 channels)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['conv4'].data[4]\n",
      "vis_square(feat, padval=0.5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The fifth layer output, `conv5` (rectified, all 256 channels)"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['conv5'].data[4]\n",
      "vis_square(feat, padval=0.5)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "The fifth layer after pooling, `pool5`"
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['pool5'].data[4]\n",
      "vis_square(feat, padval=1)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Compute the pixel level confidence of presence of cars."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "feat = net.blobs['pixel-prob'].data[4]\n",
      "image = net.deprocess('data', net.blobs['data'].data[4])\n",
      "mask = np.empty((60, 80))\n",
      "for y in range(15):\n",
      "    for x in range(20):\n",
      "        mask[y*4:(y+1)*4, x*4:(x+1)*4] = feat[:, y, x].reshape((4, 4))\n",
      "        \n",
      "\n",
      "zoomed_mask = scipy.ndimage.zoom(mask, 8, order=0)\n",
      "\n",
      "masked_image = image.transpose((2, 0, 1))\n",
      "masked_image[0, :, :] += zoomed_mask\n",
      "masked_image = np.clip(masked_image, 0, 1)\n",
      "\n",
      "masked_image = masked_image.transpose((1, 2, 0))\n",
      "plt.imshow(masked_image)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Extract bounding boxes."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "hard_mask = np.round(mask + 0.3)\n",
      "feat = net.blobs['bb-output'].data[4]\n",
      "bb = np.empty((4, 60, 80))\n",
      "for y in range(15):\n",
      "    for x in range(20):\n",
      "        for c in range(4):\n",
      "            bb[c, y*4:(y+1)*4, x*4:(x+1)*4] = feat[c*16:(c+1)*16, y, x].reshape((4, 4))\n",
      "\n",
      "for c in range(4):\n",
      "    bb[c, :, :] *= hard_mask\n",
      "    \n",
      "y_offset = np.array([np.arange(0, 480, 8)]).T\n",
      "y_offset = np.tile(y_offset, (1, 80))\n",
      "x_offset = np.arange(0, 640, 8)\n",
      "x_offset = np.tile(x_offset, (60, 1))\n",
      "bb[0, :, :] += x_offset\n",
      "bb[2, :, :] += x_offset\n",
      "bb[1, :, :] += y_offset\n",
      "bb[3, :, :] += y_offset\n",
      "\n",
      "selected_rects = hard_mask > 0\n",
      "num_rects = np.sum(selected_rects)\n",
      "rects = np.empty((num_rects, 4))\n",
      "for i in range(4):\n",
      "    rects[:, i] = bb[i, selected_rects]\n",
      "rects = rects[np.logical_and((rects[:, 2] - rects[:, 0]) > 0, (rects[:, 3] - rects[:, 1]) > 0), :]\n",
      "rects[:, (2, 3)] -= rects[:, (0, 1)]\n",
      "rects = np.clip(rects, 0, 640)\n",
      "rects = [rects[i, :] for i in range(rects.shape[0])]\n",
      "rects, scores = cv2.groupRectangles(rects, 4, 0.5)\n",
      "print rects\n",
      "print scores\n",
      "\n"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Output distribution of bounding box labels."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "bb = np.empty((4, 60, 80))\n",
      "for y in range(15):\n",
      "    for x in range(20):\n",
      "        for c in range(4):\n",
      "            bb[c, y*4:(y+1)*4, x*4:(x+1)*4] = feat[c*16:(c+1)*16, y, x].reshape((4, 4))\n",
      "for c in range(4):\n",
      "    bb[c, :, :] *= hard_mask\n",
      "plt.subplot(3, 1, 1)\n",
      "plt.plot(feat.flat)\n",
      "plt.subplot(3, 1, 2)\n",
      "plt.plot(bb.flat)\n",
      "plt.subplot(3, 1, 2)\n",
      "_ = plt.hist(bb.flat[bb.flat != 0], bins=100)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Function for drawing rectangles in images."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "def draw_rects(image, rects):\n",
      "    for i in range(rects.shape[0]):\n",
      "        xmin, ymin, w, h = rects[i, :]\n",
      "        xmax = xmin + w\n",
      "        ymax = ymin + h\n",
      "        image[ymin:ymax+1, xmin:xmin+2, 1] = 1\n",
      "        image[ymin:ymax+1, xmax:xmax+2, 1] = 1\n",
      "        image[ymin:ymin+2, xmin:xmax+1, 1] = 1\n",
      "        image[ymax:ymax+2, xmin:xmax+1, 1] = 1     \n",
      "    return image\n",
      "    "
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "markdown",
     "metadata": {},
     "source": [
      "Draw both confidence and bounding boxes."
     ]
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [
      "boxed_image = np.copy(masked_image)\n",
      "boxed_image = draw_rects(boxed_image, rects)\n",
      "plt.imshow(boxed_image)"
     ],
     "language": "python",
     "metadata": {},
     "outputs": []
    },
    {
     "cell_type": "code",
     "collapsed": false,
     "input": [],
     "language": "python",
     "metadata": {},
     "outputs": []
    }
   ],
   "metadata": {}
  }
 ]
}